
* -------------------
* Title: 1_weight.do
* -------------------

* Preface: This is the FIRST of four code files to reproduce all results
* reported in Joo, Elwert, and Munk 2024 "Labor Market Consequences of 
* Grandparenthood" from source data stored on Statistics Denmark servers.
* See ReadMe for details on data access.  This do file has been minimally
* redacted by Statistics Denmark staff to remove identifying information
* in compliance with applicable law. 

* Content: this code produces the weights for the repeated measure marginal
* structural models reported in the paper. 


* Set up
* ---------------
version 17.0
set seed 12345
set more off
set type float
set matsize 11000

* Define macros
* ---------------
global home "E:\workdata\704121\Wontak"
global raw "E:\workdata\704121\Wontak\raw"
global data "E:\workdata\704121\Wontak\data"
global result "E:\workdata\704121\Wontak\result"

global y "g1_empd g1_empdi g1_incomel"
global d1 "d"
global dc1 "dc"
global f1 "female teenb_g1 ager1 ager2 year yearr t1 g"
global g1 "births_t g1_workexp g1_empd g1_empdi g1_incomel g1_yed g1_ced g1_psych g1_ccs g1_dhosp g1_mstatr g1_partner p_empdi p_incomeli p_psych p_ccs p_dhosp"
global g21 "g2_1_partner g2_1_workexp g2_1_empd g2_1_incomel g2_1_yed g2_1_ced g2_1_psych g2_1_ccs g2_1_dhosp"
global g2 "g2 g3 g2_female g2_partner g2_empd g2_incomel g2_yed g2_ced g2_psych g2_ccs g2_dhosp"
global h "h_incomeg hhsize hh_g2 hh_u18 h_welfare"

global v1_g1 "L1.c.births_t L1.c.g1_workexp L1.c.g1_empd L1.i.g1_empdi L1.c.g1_incomel L1.c.g1_yed L1.i.g1_ced L1.c.g1_psych L1.c.g1_ccs L1.c.g1_dhosp L1.i.g1_partner L1.i.p_empdi L1.i.p_incomeli L1.i.p_psych L1.i.p_ccs L1.i.p_dhosp"
global v2_g1 "L2.c.births_t L2.c.g1_workexp L2.c.g1_empd L2.i.g1_empdi L2.c.g1_incomel L2.c.g1_yed L2.i.g1_ced L2.c.g1_psych L2.c.g1_ccs L2.c.g1_dhosp L2.i.g1_partner L2.i.p_empdi L2.i.p_incomeli L2.i.p_psych L2.i.p_ccs L2.i.p_dhosp"
global v1_g21 "L1.i.g2_1_partner L1.c.g2_1_workexp L1.c.g2_1_empd L1.c.g2_1_incomel L1.c.g2_1_yed L1.i.g2_1_ced L1.c.g2_1_psych L1.c.g2_1_ccs L1.c.g2_1_dhosp"
global v2_g21 "L2.i.g2_1_partner L2.c.g2_1_workexp L2.c.g2_1_empd L2.c.g2_1_incomel L2.c.g2_1_yed L2.i.g2_1_ced L2.c.g2_1_psych L2.c.g2_1_ccs L2.c.g2_1_dhosp"
global v1_g2 "L1.c.g2 L1.c.g3 L1.c.g2_female L1.c.g2_partner L1.c.g2_empd L1.c.g2_incomel L1.c.g2_yed L1.c.g2_ced L1.c.g2_psych L1.c.g2_ccs L1.c.g2_dhosp"
global v2_g2 "L2.c.g2 L2.c.g3 L2.c.g2_female L2.c.g2_partner L2.c.g2_empd L2.c.g2_incomel L2.c.g2_yed L2.c.g2_ced L2.c.g2_psych L2.c.g2_ccs L2.c.g2_dhosp"
global v1_h "L1.c.h_incomeg L1.c.hhsize L1.c.hh_g2 L1.c.hh_u18 L1.c.h_welfare"
global v2_h "L2.c.h_incomeg L2.c.hhsize L2.c.hh_g2 L2.c.hh_u18 L2.c.h_welfare"

global t1_g1 "c.births_t_t1 c.g1_workexp_t1 c.g1_empd_t1 i.g1_empdi_t1 c.g1_incomel_t1 c.g1_yed_t1 c.g1_psych_t1 c.g1_ccs_t1 c.g1_dhosp_t1 i.g1_mstatr_t1 i.p_empdi_t1 i.p_incomeli_t1 i.p_psych_t1 i.p_ccs_t1 i.p_dhosp_t1"
global t1_g21 "c.g2_1_yed_t1 c.g2_1_psych_t1 c.g2_1_ccs_t1 c.g2_1_dhosp_t1"
global t1_g2 "c.g2_t1 c.g2_female_t1"
global t1_h "c.h_incomeg_t1 c.hhsize_t1 c.hh_g2_t1 c.hh_u18_t1 c.h_welfare_t1"


* Load and prepare data
* -----------------------

use $data\data_230115.dta, clear

* drop unnecessary variables
drop b1_* b2_* b3_* *_t2

* sample setting
keep if t1<.
bysort pnrr: egen syear=min(year)
bysort pnrr: egen sage=min(ager1)
bysort pnrr: egen sinmin=min(g1_incomel)
bysort pnrr: egen sinmax=max(g1_incomel)
keep if sage>=20
keep if ager1<=64
drop if sinmin<0
drop if sinmax>=1000000
xtset pnrr t1

egen m1=rowmiss($d1 $g1 $g21 $g2 $h)
egen m2=rowmiss($y $d1 $f1)
sort pnrr year
bysort pnrr: gen s=1 if d_t1==0 & d_t11==0 & t1>=3 & t1<. & m1[_n-2]==0 & m1[_n-1]==0 & m2==0 & g1_die!=1 & emmi!=1 & g2_1_die!=1 & d1_d!=1
bysort pnrr: replace s=s*s[_n-1] if t1>=4
bysort pnrr: gen attr=0 if s==1
bysort pnrr: replace attr=1 if s[_n-1]==1 & s==.
bysort pnrr: replace attr=1 if t1==3 & m1[_n-2]==0 & s==.
bysort pnrr: replace s=0 if s[_n+1]==1 & t1==2
bysort pnrr: replace s=0 if s[_n+2]==1 & t1==1

gen tb=t1+12-$dc1
recode tb (min/19=1)(20/max=0)
replace tb=0 if $d1==0

* Compute weights
* ------------------

* cross-sectional weights
sort pnrr t1

logit $d1 (i.female#i.g)##((c.t1##c.t1)##(i.teenb_g1 $t1_g1 $v1_g1 $v2_g1 $t1_g21 $v1_g21 $v2_g21 $t1_g2 $v1_g2 $v2_g2 $t1_h $v1_h $v2_h) i.ager2 i.yearr i.t1) if L.$d1==0 & s==1
predict wdd if e(sample)
replace wdd=1 if L.$d1==1 & s==1
replace wdd=wdd*$d1+(1-wdd)*(1-$d1)
logit $d1 (i.female#i.g)##((c.t1##c.t1)##(i.teenb_g1 $t1_g1 $t1_g21 $t1_g2 $t1_h) i.ager2 i.yearr i.t1) if L.$d1==0 & s==1
predict wdn if e(sample)
replace wdn=1 if L.$d1==1 & s==1
replace wdn=wdn*$d1+(1-wdn)*(1-$d1)

logit attr (i.female#i.g)##((c.t1##c.t1)##(L2.$d1 i.teenb_g1 $t1_g1 $v2_g1 $t1_g21 $v2_g21 $t1_g2 $v2_g2 $t1_h $v2_h) i.ager2 i.yearr i.t1)
predict wad if e(sample)
replace wad=wad*attr+(1-wad)*(1-attr)
logit attr (i.female#i.g)##((c.t1##c.t1)##(L2.$d1 i.teenb_g1 $t1_g1 $t1_g21 $t1_g2 $t1_h) i.ager2 i.yearr i.t1)
predict wan if e(sample)
replace wan=wan*attr+(1-wan)*(1-attr)

* longitudinal weights
sort pnrr t1

bysort pnrr: replace wdd=wdd*wdd[_n-1] if s==1 & t1>=4
bysort pnrr: replace wdn=wdn*wdn[_n-1] if s==1 & t1>=4
bysort pnrr: replace wad=wad*wad[_n-1] if s==1 & t1>=4
bysort pnrr: replace wan=wan*wan[_n-1] if s==1 & t1>=4
gen wd=wdn/wdd
gen wa=wan/wad
gen wo=wd*wa

recode wo (14/max=14), gen(w)

keep if s<.

* Save data set containing weights for later use 
* -------------------------------------------------
save $data\msm_230115.dta, replace

log close
